In [1]:
import pandas as pd
df = pd.read_csv('RRR.L 2.csv')
print(df.head())
print(df.describe())
print(df.isnull().sum())
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
       Date  Close
0  1/1/2019  0.575
1  1/2/2019  0.600
2  1/3/2019  0.600
3  1/4/2019  0.525
4  1/5/2019  0.575
           Close
count  60.000000
mean    0.483467
std     0.253323
min     0.105000
25%     0.308750
50%     0.475000
75%     0.581250
max     1.100000
Date     0
Close    0
dtype: int64

Activity 1¶

Exploratory Data Analysis and Visualization¶

In [9]:
import matplotlib.pyplot as plt


plt.figure(figsize=(10, 6))
plt.plot(df['Close'], label='Close Price')
plt.title('Time Series of Close Price')
plt.xlabel('Date')
plt.ylabel('Close Price')


for i, (date, close) in enumerate(df['Close'].items()):
    if i % 30 == 0:  
        plt.annotate(f'{close:.2f}', (date, close), textcoords="offset points", xytext=(0, 10), ha='center')

plt.legend()
plt.show()
No description has been provided for this image
In [8]:
import matplotlib.pyplot as plt


rolling_mean = df['Close'].rolling(window=12).mean()
rolling_std = df['Close'].rolling(window=12).std()


plt.figure(figsize=(12, 6))
plt.plot(df['Close'], color='blue', label='Close Price')
plt.plot(rolling_mean, color='red', label='Rolling Mean')
plt.plot(rolling_std, color='black', label='Rolling Std Dev')


for x, y in zip(df.index, df['Close']):
    plt.text(x, y, f'{y:.2f}', fontsize=8, ha='center', va='bottom', color='blue')
    
for x, y in zip(df.index, rolling_mean):
    if not pd.isna(y):  
        plt.text(x, y, f'{y:.2f}', fontsize=8, ha='center', va='bottom', color='red')
        
for x, y in zip(df.index, rolling_std):
    if not pd.isna(y): 
        plt.text(x, y, f'{y:.2f}', fontsize=8, ha='center', va='bottom', color='black')

plt.title('Rolling Mean & Standard Deviation')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
No description has been provided for this image
In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf


fig, ax = plt.subplots(figsize=(12, 6))
plot_acf(df['Close'], ax=ax, lags=40, alpha=0.05)


for i in range(1, 41):  
    y = np.corrcoef(df['Close'][:-i], df['Close'][i:])[0, 1]
    ax.annotate(f'{y:.2f}', xy=(i, y), xytext=(i, y + 0.05),
                arrowprops=dict(facecolor='black', arrowstyle='->'),
                fontsize=9, ha='center')

plt.title('Autocorrelation Plot of Close Price')
plt.xlabel('Lags')
plt.ylabel('Autocorrelation')
plt.show()
No description has been provided for this image

Time Series Decomposition¶

In [11]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(df['Close'], model='additive', period=12)
observed = result.observed
trend = result.trend
seasonal = result.seasonal
resid = result.resid

# Create subplots
fig = make_subplots(rows=4, cols=1, shared_xaxes=True, subplot_titles=['Observed', 'Trend', 'Seasonal', 'Residual'])
fig.add_trace(go.Scatter(x=observed.index, y=observed, mode='lines', name='Observed'), row=1, col=1)
fig.add_trace(go.Scatter(x=trend.index, y=trend, mode='lines', name='Trend'), row=2, col=1)
fig.add_trace(go.Scatter(x=seasonal.index, y=seasonal, mode='lines', name='Seasonal'), row=3, col=1)
fig.add_trace(go.Scatter(x=resid.index, y=resid, mode='lines', name='Residual'), row=4, col=1)
fig.update_layout(height=800, title='Time Series Decomposition', showlegend=False)

fig.show()

Naive method¶

In [14]:
naive_forecast = df['Close'].shift(1)

# Plot actual vs forecast
plt.figure(figsize=(10, 6))
plt.plot(df['Close'], label='Actual')
plt.plot(naive_forecast, label='Naive Forecast', linestyle='--')
plt.title('Naive Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()

# Calculate accuracy metrics for Naive Method
from sklearn.metrics import mean_absolute_error, mean_squared_error

naive_mae = mean_absolute_error(df['Close'][1:], naive_forecast[1:])
naive_mse = mean_squared_error(df['Close'][1:], naive_forecast[1:])
naive_rmse = np.sqrt(naive_mse)

print(f'Naive Method MAE: {naive_mae}')
print(f'Naive Method MSE: {naive_mse}')
print(f'Naive Method RMSE: {naive_rmse}')
No description has been provided for this image
Naive Method MAE: 0.06966101694915253
Naive Method MSE: 0.0107091186440678
Naive Method RMSE: 0.10348487157100693

Average Historical Method¶

In [15]:
# Average historical method
average_forecast = [df['Close'][:i].mean() for i in range(1, len(df)+1)]

# Plot actual vs forecast
plt.figure(figsize=(10, 6))
plt.plot(df['Close'], label='Actual')
plt.plot(df.index, average_forecast, label='Average Historical Forecast', linestyle='--')
plt.title('Average Historical Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()

# Calculate accuracy metrics for Average Historical Method
average_mae = mean_absolute_error(df['Close'], average_forecast)
average_mse = mean_squared_error(df['Close'], average_forecast)
average_rmse = np.sqrt(average_mse)

print(f'Average Historical Method MAE: {average_mae}')
print(f'Average Historical Method MSE: {average_mse}')
print(f'Average Historical Method RMSE: {average_rmse}')
No description has been provided for this image
Average Historical Method MAE: 0.20028654805085913
Average Historical Method MSE: 0.06101835414212262
Average Historical Method RMSE: 0.2470189347846084

Activity 2¶

Time Series Decomposition¶

In [20]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose

result_additive = seasonal_decompose(df['Close'], model='additive', period=12)
result_multiplicative = seasonal_decompose(df['Close'], model='multiplicative', period=12)
In [21]:
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1)
result_additive.trend.plot(ax=plt.gca(), title='Additive Decomposition')
plt.ylabel('Trend')
plt.subplot(2, 1, 2)
result_additive.seasonal.plot(ax=plt.gca())
plt.ylabel('Seasonal')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [22]:
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1)
result_multiplicative.trend.plot(ax=plt.gca(), title='Multiplicative Decomposition')
plt.ylabel('Trend')
plt.subplot(2, 1, 2)
result_multiplicative.seasonal.plot(ax=plt.gca())
plt.ylabel('Seasonal')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [ ]:
 

Simple Average Method¶

In [24]:
simple_average_forecast = df['Close'].mean()


plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.axhline(y=simple_average_forecast, color='r', linestyle='--', label='Simple Average Forecast')
plt.title('Simple Average Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()


from sklearn.metrics import mean_absolute_error, mean_squared_error

simple_average_mae = mean_absolute_error(df['Close'], [simple_average_forecast] * len(df))
simple_average_mse = mean_squared_error(df['Close'], [simple_average_forecast] * len(df))
simple_average_rmse = np.sqrt(simple_average_mse)

print(f'Simple Average Method MAE: {simple_average_mae}')
print(f'Simple Average Method MSE: {simple_average_mse}')
print(f'Simple Average Method RMSE: {simple_average_rmse}')
No description has been provided for this image
Simple Average Method MAE: 0.19454666666666662
Simple Average Method MSE: 0.06310288222222223
Simple Average Method RMSE: 0.2512028706488487
In [ ]:
 

Exponential Smoothing methods¶

In [29]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
In [31]:
# Single Exponential Smoothing
ses_model = ExponentialSmoothing(df['Close']).fit(optimized=True)
ses_forecast = ses_model.forecast(len(df))

# Holt's Linear Exponential Smoothing
holt_model = ExponentialSmoothing(df['Close'], trend='add').fit(optimized=True)
holt_forecast = holt_model.forecast(len(df))

# Holt-Winters Exponential Smoothing
hw_model = ExponentialSmoothing(df['Close'], trend='add', seasonal='add', seasonal_periods=12).fit(optimized=True)
hw_forecast = hw_model.forecast(len(df))
In [32]:
# Plot actual vs forecast - Single Exponential Smoothing
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(df.index, ses_forecast, label='SES Forecast', linestyle='--')
plt.title('SES Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
No description has been provided for this image
In [34]:
# Holt's Linear Exponential Smoothing
holt_model = ExponentialSmoothing(df['Close'], trend='add').fit(optimized=True)
holt_forecast = holt_model.forecast(len(df))

# Plot actual vs forecast - Holt's Linear Exponential Smoothing
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(df.index, holt_forecast, label="Holt's Linear Forecast", linestyle='--')
plt.title("Holt's Linear Forecast vs Actual")
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
No description has been provided for this image
In [35]:
# Holt-Winters Exponential Smoothing
hw_model = ExponentialSmoothing(df['Close'], trend='add', seasonal='add', seasonal_periods=12).fit(optimized=True)
hw_forecast = hw_model.forecast(len(df))

# Plot actual vs forecast - Holt-Winters Exponential Smoothing
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(df.index, hw_forecast, label="Holt-Winters Forecast", linestyle='--')
plt.title("Holt-Winters Forecast vs Actual")
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
No description has been provided for this image
In [36]:
# Calculatng accuracy metrics for Exponential Smoothing Methods
ses_mae = mean_absolute_error(df['Close'], ses_forecast)
ses_mse = mean_squared_error(df['Close'], ses_forecast)
ses_rmse = np.sqrt(ses_mse)

holt_mae = mean_absolute_error(df['Close'], holt_forecast)
holt_mse = mean_squared_error(df['Close'], holt_forecast)
holt_rmse = np.sqrt(holt_mse)

hw_mae = mean_absolute_error(df['Close'], hw_forecast)
hw_mse = mean_squared_error(df['Close'], hw_forecast)
hw_rmse = np.sqrt(hw_mse)

print(f'SES Method MAE: {ses_mae}')
print(f'SES Method MSE: {ses_mse}')
print(f'SES Method RMSE: {ses_rmse}')

print(f"Holt's Linear Method MAE: {holt_mae}")
print(f"Holt's Linear Method MSE: {holt_mse}")
print(f"Holt's Linear Method RMSE: {holt_rmse}")

print(f'Holt-Winters Method MAE: {hw_mae}')
print(f'Holt-Winters Method MSE: {hw_mse}')
SES Method MAE: 0.3784666665226221
SES Method MSE: 0.20633989988720813
SES Method RMSE: 0.4542465188498511
Holt's Linear Method MAE: 0.6223080030322905
Holt's Linear Method MSE: 0.4385011076389994
Holt's Linear Method RMSE: 0.6621941615863125
Holt-Winters Method MAE: 0.6659882542623528
Holt-Winters Method MSE: 0.49333830188703975

Activity 3¶

Time Series Stationarity Test and Differencing¶

In [49]:
from statsmodels.tsa.stattools import adfuller


result = adfuller(df['Close'])
print('ADF Statistic (Original Series):', result[0])
print('p-value (Original Series):', result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t', key, ':', value)


if result[1] > 0.05:
    try:
        df['Close_diff'] = df['Close'].diff().dropna()
        result_diff = adfuller(df['Close_diff'])
        print('ADF Statistic (After differencing):', result_diff[0])
        print('p-value (After differencing):', result_diff[1])
    except Exception as e:
        print("E:", e)
ADF Statistic (Original Series): -1.2420817124279229
p-value (Original Series): 0.6552364663643981
Critical Values:
	 1% : -3.5463945337644063
	 5% : -2.911939409384601
	 10% : -2.5936515282964665
E: exog contains inf or nans
In [ ]:
 

ACF and PACF¶

In [63]:
from statsmodels.tsa.stattools import pacf, acf
import matplotlib.pyplot as plt


acf_values = acf(df['Close_diff'].dropna(), nlags=20)
pacf_values = pacf(df['Close_diff'].dropna(), nlags=20)


plt.figure(figsize=(12, 6))
plt.subplot(2, 1, 1)
plt.plot(range(len(acf_values)), acf_values, marker='o', linestyle='-', color='red')
plt.title('Autocorrelation Function (ACF)')
plt.xlabel('Lag')
plt.ylabel('ACF')
plt.grid(True)


for i, txt in enumerate(acf_values):
    plt.annotate(f'{txt:.2f}', (i, acf_values[i]), textcoords="offset points", xytext=(0,10), ha='center')


plt.subplot(2, 1, 2)
plt.plot(range(len(pacf_values)), pacf_values, marker='o', linestyle='-', color='blue')
plt.title('Partial Autocorrelation Function (PACF)')
plt.xlabel('Lag')
plt.ylabel('PACF')
plt.grid(True)


for i, txt in enumerate(pacf_values):
    plt.annotate(f'{txt:.2f}', (i, pacf_values[i]), textcoords="offset points", xytext=(0,10), ha='center')

plt.tight_layout()
plt.show()
No description has been provided for this image

ARIMA Model and forecast¶

In [65]:
from statsmodels.tsa.arima.model import ARIMA


p = 1  # AR term
d = 1  # Differencing
q = 1  # MA term

model = ARIMA(df['Close'], order=(p, d, q)).fit()


print(model.summary())
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                   60
Model:                 ARIMA(1, 1, 1)   Log Likelihood                  50.188
Date:                Mon, 03 Jun 2024   AIC                            -94.377
Time:                        19:36:43   BIC                            -88.144
Sample:                             0   HQIC                           -91.944
                                 - 60                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.1823      2.664     -0.068      0.945      -5.403       5.039
ma.L1          0.2312      2.653      0.087      0.931      -4.969       5.431
sigma2         0.0107      0.001      9.589      0.000       0.008       0.013
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                95.78
Prob(Q):                              0.92   Prob(JB):                         0.00
Heteroskedasticity (H):               0.38   Skew:                             1.81
Prob(H) (two-sided):                  0.04   Kurtosis:                         8.08
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [69]:
forecast_period = 10  


forecast = model.forecast(steps=forecast_period)


plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(pd.date_range(start=df.index[-1], periods=forecast_period + 1, freq='M')[1:], forecast, label='ARIMA Forecast', linestyle='--')
plt.title('ARIMA Forecast')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
No description has been provided for this image
In [75]:
arima_forecast = model.forecast(steps=forecast_period)


arima_mae = mean_absolute_error(df['Close'][-forecast_period:], arima_forecast)
arima_mse = mean_squared_error(df['Close'][-forecast_period:], arima_forecast)
arima_rmse = np.sqrt(arima_mse)

print(f'ARIMA Method MAE: {arima_mae}')
print(f'ARIMA Method MSE: {arima_mse}')
print(f'ARIMA Method RMSE: {arima_rmse}')
ARIMA Method MAE: 0.07653107886887411
ARIMA Method MSE: 0.010287655980626299
ARIMA Method RMSE: 0.10142808280070317

Sarima model and forecast¶

In [71]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
In [72]:
from statsmodels.tsa.statespace.sarimax import SARIMAX


p = 1  # AR order
d = 1  # Differencing
q = 1  # MA order
P = 1  # Seasonal AR order
D = 1  # Seasonal differencing
Q = 1  # Seasonal MA order
m = 12  

sarima_model = SARIMAX(df['Close'], order=(p, d, q), seasonal_order=(P, D, Q, m)).fit()


print(sarima_model.summary())
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                              Close   No. Observations:                   60
Model:             SARIMAX(1, 1, 1)x(1, 1, 1, 12)   Log Likelihood                  31.516
Date:                            Mon, 03 Jun 2024   AIC                            -53.032
Time:                                    19:39:45   BIC                            -43.782
Sample:                                         0   HQIC                           -49.551
                                             - 60                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.7297      1.883      0.388      0.698      -2.961       4.421
ma.L1         -0.6907      1.923     -0.359      0.720      -4.460       3.079
ar.S.L12      -0.5207      0.187     -2.783      0.005      -0.887      -0.154
ma.S.L12      -0.3281      0.380     -0.863      0.388      -1.073       0.417
sigma2         0.0127      0.004      3.520      0.000       0.006       0.020
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                 1.30
Prob(Q):                              0.92   Prob(JB):                         0.52
Heteroskedasticity (H):               0.72   Skew:                             0.29
Prob(H) (two-sided):                  0.53   Kurtosis:                         3.57
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [76]:
# Plot SARIMA forecast
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(pd.date_range(start=df.index[-1], periods=forecast_period + 1, freq='M')[1:], sarima_forecast, label='SARIMA Forecast', linestyle='--')
plt.title('SARIMA Forecast')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
No description has been provided for this image
In [74]:
# Forecast with SARIMA model
forecast_period = 12  # Example: forecast for the next 12 periods
sarima_forecast = sarima_model.forecast(steps=forecast_period)

# Calculate accuracy metrics for SARIMA model
sarima_mae = mean_absolute_error(df['Close'][-forecast_period:], sarima_forecast)
sarima_mse = mean_squared_error(df['Close'][-forecast_period:], sarima_forecast)
sarima_rmse = np.sqrt(sarima_mse)

print(f'SARIMA Method MAE: {sarima_mae}')
print(f'SARIMA Method MSE: {sarima_mse}')
print(f'SARIMA Method RMSE: {sarima_rmse}')
SARIMA Method MAE: 0.20876680891557578
SARIMA Method MSE: 0.04523539372534637
SARIMA Method RMSE: 0.21268613900615707
In [ ]: